In [1]:
import pandas as pd
df = {}
In [2]:
from clustergrammer2 import net
>> clustergrammer2 backend version 0.4.0
In [3]:
import ipywidgets as widgets
import numpy as np
from bqplot import pyplot as plt
import bqplot
In [4]:
from glob import glob

Load X-shift cluster ID

In [5]:
df['cell_type'] = pd.read_excel('../data/big_data/mmc2__codex_cell_type_info.xlsx', index_col=0)
df['cell_type'].shape
Out[5]:
(58, 1)
In [6]:
ct_dict = {}
cell_types = []
for inst_id in df['cell_type'].index.tolist():
    inst_ct = df['cell_type']['Imaging phenotype (cell type)'].loc[inst_id]
    cell_types.append(inst_ct)
    ct_dict[inst_id] = inst_ct
In [7]:
cell_types = sorted(list(set(cell_types)))
print(len(cell_types))
cell_types
28
Out[7]:
['B cells',
 'B220(+) DN T cells',
 'CD106(+)CD16/32(+)CD31(+) stroma',
 'CD106(+)CD16/32(+)CD31(-)Ly6C(-) stroma',
 'CD106(+)CD16/32(-)Ly6C(+)CD31(+)',
 'CD106(-)CD16/32(+)Ly6C(+)CD31(-)',
 'CD106(-)CD16/32(-)Ly6C(+)CD31(+) stroma',
 'CD11c(+) B cells',
 'CD3(+) other markers (-)',
 'CD31(hi) vascular',
 'CD4(+) T cells',
 'CD4(+)CD8(-)cDC',
 'CD4(+)MHCII(+)',
 'CD4(-)CD8(+)cDC',
 'CD4(-)CD8(-) cDC',
 'CD8(+) T cells',
 'ERTR7(+) stroma',
 'F4/80(+) mphs',
 'FDCs',
 'NK cells',
 'capsule',
 'dirt',
 'erythroblasts',
 'granulocytes',
 'marginal zone mphs',
 'megakaryocytes',
 'noid',
 'plasma cells']
In [8]:
x_dim = 1200
y_dim = 1000

Check available Files

In [9]:
glob('../data/big_data/*.txt')
Out[9]:
['../data/big_data/channelNames_BALBcMRLdataset.txt']
In [10]:
glob('../data/big_data/*.csv')
Out[10]:
['../data/big_data/CODEX_MRLdataset_neighborhood_graph.csv',
 '../data/big_data/BALBcMRLdataset_exposuretimes.csv',
 '../data/big_data/Suppl.Table2.CODEX_paper_MRLdatasetexpression.csv']

Expression Data

See http://welikesharingdata.blob.core.windows.net/forshare/index.html

  • CSV file contains mean marker intensities and X, Y, Z corrdinates for each cell relative to the top left corner of its tile.
  • Tile IDs correspond to tile images sorted in the alphanumeric order
  • Size parameter corresponds to the cell object sizes in voxels
  • "imaging phenotype Cluster ID" column specifies the phenotypic cluster identity as determined by X-shift
  • "niche Cluster ID" column specifies the i-niche cluster determined by K-means (K=100)
  • neighborhood graph files describes which pairs of cells are adjacent to one another
In [11]:
%%time
df['exp'] = pd.read_csv('../data/big_data/Suppl.Table2.CODEX_paper_MRLdatasetexpression.csv')
new_rows = ['C-' + str(x) for x in df['exp'].index.tolist()]
df['exp'].index = new_rows
print(df['exp'].shape)
(734101, 38)
CPU times: user 10.9 s, sys: 596 ms, total: 11.5 s
Wall time: 10.8 s
In [12]:
df['exp'].head()
Out[12]:
Imaging phenotype cluster ID CD45 Ly6C TCR Ly6G CD19 CD169 CD106 CD3 CD1632 ... CD44 NKp46 X.X Y.Y Z.Z MHCII blank_Cy3_cyc15 blank_Cy5_cyc15 sample_Xtile_Ytile niche cluster ID
C-0 9600 1577.675415 -154.301758 130.692184 -4.168493 560.691345 -504.231476 854.670105 -631.294189 385.935242 ... 422.408691 515.130066 10 70 13 6712.812988 1665.967896 398.348389 BALBc-3_X05_Y03 32.0
C-1 9600 1017.838440 -93.069397 144.076584 40.010998 885.595520 -391.357544 62.764454 -474.201172 -469.634583 ... 448.701660 171.880310 1000 294 12 2024.678711 1287.959229 421.991425 BALBc-3_X01_Y02 99.0
C-2 9600 5978.459961 -330.099365 139.631744 -82.840302 1747.897583 -395.508820 954.326782 -1026.204468 3744.718262 ... 2229.804443 512.220764 1003 107 8 8647.193359 2817.173828 709.545105 BALBc-2_X05_Y04 74.0
C-3 9600 6119.109375 -54.384808 -768.871704 25.625927 1065.311890 -485.535431 538.404175 -611.836426 865.842590 ... 665.720459 351.108246 1003 113 13 4838.463379 1646.660278 408.523590 BALBc-2_X03_Y04 98.0
C-4 9600 6272.474609 -235.512405 74.058075 -101.729919 1186.295044 -782.744995 1261.625366 -915.565552 1361.536011 ... 2065.742676 259.003235 1003 148 9 5092.891602 2161.109131 713.416199 BALBc-2_X02_Y01 71.0

5 rows × 38 columns

In [13]:
df['exp'].columns.tolist()
Out[13]:
['Imaging phenotype cluster ID',
 'CD45',
 'Ly6C',
 'TCR',
 'Ly6G',
 'CD19',
 'CD169',
 'CD106',
 'CD3',
 'CD1632',
 'CD8a',
 'CD90',
 'F480',
 'CD11c',
 'Ter119',
 'CD11b',
 'IgD',
 'CD27',
 'CD5',
 'CD79b',
 'CD71',
 'CD31',
 'CD4',
 'IgM',
 'B220',
 'ERTR7',
 'CD35',
 'CD2135',
 'CD44',
 'NKp46',
 'X.X',
 'Y.Y',
 'Z.Z',
 'MHCII',
 'blank_Cy3_cyc15',
 'blank_Cy5_cyc15',
 'sample_Xtile_Ytile',
 'niche cluster ID']

Add Sample Column

In [14]:
sample_list = [x.split('_')[0] for x in list(df['exp']['sample_Xtile_Ytile'].get_values())]
ser_sample = pd.Series(sample_list, name='sample_slide', index=df['exp'].index.tolist())
In [15]:
print(len(sorted(list(set(sample_list)))))
list_slides = sorted(list(set(sample_list)))
list_slides
9
Out[15]:
['BALBc-1',
 'BALBc-2',
 'BALBc-3',
 'MRL-4',
 'MRL-5',
 'MRL-6',
 'MRL-7',
 'MRL-8',
 'MRL-9']
In [16]:
df['exp']['sample_slide'] = ser_sample
In [17]:
ser_sample.value_counts()
Out[17]:
BALBc-1    85572
BALBc-2    85298
BALBc-3    83787
MRL-7      83335
MRL-8      82626
MRL-9      81156
MRL-6      81052
MRL-4      77848
MRL-5      73427
Name: sample_slide, dtype: int64
In [18]:
cols = df['exp'].columns.tolist()
cols
Out[18]:
['Imaging phenotype cluster ID',
 'CD45',
 'Ly6C',
 'TCR',
 'Ly6G',
 'CD19',
 'CD169',
 'CD106',
 'CD3',
 'CD1632',
 'CD8a',
 'CD90',
 'F480',
 'CD11c',
 'Ter119',
 'CD11b',
 'IgD',
 'CD27',
 'CD5',
 'CD79b',
 'CD71',
 'CD31',
 'CD4',
 'IgM',
 'B220',
 'ERTR7',
 'CD35',
 'CD2135',
 'CD44',
 'NKp46',
 'X.X',
 'Y.Y',
 'Z.Z',
 'MHCII',
 'blank_Cy3_cyc15',
 'blank_Cy5_cyc15',
 'sample_Xtile_Ytile',
 'niche cluster ID',
 'sample_slide']
In [19]:
exp_cols = cols[1:30]
exp_cols
Out[19]:
['CD45',
 'Ly6C',
 'TCR',
 'Ly6G',
 'CD19',
 'CD169',
 'CD106',
 'CD3',
 'CD1632',
 'CD8a',
 'CD90',
 'F480',
 'CD11c',
 'Ter119',
 'CD11b',
 'IgD',
 'CD27',
 'CD5',
 'CD79b',
 'CD71',
 'CD31',
 'CD4',
 'IgM',
 'B220',
 'ERTR7',
 'CD35',
 'CD2135',
 'CD44',
 'NKp46']
In [20]:
unique_dict = {}
for inst_col in cols:
    inst_list_unique = list(df['exp'][inst_col].unique())
    unique_dict[inst_col] = inst_list_unique    
    inst_num_unique = len(inst_list_unique)
    print(inst_col, inst_num_unique)
Imaging phenotype cluster ID 58
CD45 726877
Ly6C 731257
TCR 731035
Ly6G 729442
CD19 726981
CD169 730498
CD106 727870
CD3 730526
CD1632 728506
CD8a 728773
CD90 728172
F480 731091
CD11c 725943
Ter119 728879
CD11b 725897
IgD 730293
CD27 727817
CD5 730295
CD79b 727003
CD71 729364
CD31 725039
CD4 730112
IgM 727925
B220 729995
ERTR7 729781
CD35 728492
CD2135 729290
CD44 727003
NKp46 723719
X.X 1342
Y.Y 1006
Z.Z 15
MHCII 729269
blank_Cy3_cyc15 721818
blank_Cy5_cyc15 720065
sample_Xtile_Ytile 565
niche cluster ID 101
sample_slide 9

Select Single Image Tile

BALBc: normal tissue MRL/lpr: spleen from animals with systemic autoimmune disease

Start with: 'BALBc-1_X01_Y01'

In [21]:
df_list = []

for inst_tile in ['BALBc-1_X01_Y01', 'BALBc-1_X02_Y01', 'BALBc-1_X01_Y02', 'BALBc-1_X02_Y02']:
# for inst_tile in ['BALBc-1_X01_Y01', 'BALBc-1_X02_Y01']:
# for inst_tile in ['BALBc-1_X01_Y02', 'BALBc-1_X02_Y02']:    
    keep_rows = []

    ser_tile = df['exp']['sample_Xtile_Ytile']
    ser_found = ser_tile[ser_tile == inst_tile]
    
    keep_rows.extend(ser_found.index.tolist())
    inst_df = df['exp'].loc[keep_rows].transpose()
    print('inst_df', inst_df.shape)
    
    if 'X02' in inst_tile:
        inst_df.loc['X.X'] = inst_df.loc['X.X'] + 1350

    if 'Y01' in inst_tile:        
        inst_df.loc['Y.Y'] = 2000 - inst_df.loc['Y.Y']
    
    if 'Y02' in inst_tile:
        inst_df.loc['Y.Y'] = 1000 - inst_df.loc['Y.Y'] 
        
    df_list.append(inst_df)
    
    
# df['tile'] = df['exp'].loc[keep_rows].transpose()
df['tile'] = pd.concat(df_list, axis=1)
print(df['tile'].shape)

cats = df['tile'].loc['Imaging phenotype cluster ID']
cats = [ct_dict[x] for x in cats]

new_cols = []

cols = df['tile'].columns.tolist()
for index in range(len(cols)):
    
    new_col = (cols[index], 'Cell Type: ' + str(cats[index]))
    new_cols.append(new_col)
df['tile'].columns = new_cols
df['tile'].shape
inst_df (39, 1127)
inst_df (39, 1247)
inst_df (39, 1481)
inst_df (39, 1291)
(39, 5146)
Out[21]:
(39, 5146)

Plot Tile Expression Levels

In [22]:
df['tile-exp-ini'] = df['tile'].loc[exp_cols]
df['tile-exp-ini'].shape
Out[22]:
(29, 5146)

Set Negative Expression Levels to Zero

In [23]:
df['tile-exp'] = df['tile-exp-ini']
df['tile-exp'][df['tile-exp'] < 0] = 0
In [24]:
df['tile-exp'].transpose().describe()
Out[24]:
CD45 Ly6C TCR Ly6G CD19 CD169 CD106 CD3 CD1632 CD8a ... CD71 CD31 CD4 IgM B220 ERTR7 CD35 CD2135 CD44 NKp46
count 5146.0 5146.0 5146.0 5146.0 5146.0 5146.0 5146.0 5146.0 5146.0 5146.0 ... 5146.0 5146.0 5146.0 5146.0 5146.0 5146.0 5146.0 5146.0 5146.0 5146.0
unique 4726.0 2276.0 3872.0 3101.0 4709.0 2543.0 4739.0 1769.0 4661.0 3991.0 ... 3788.0 4900.0 3512.0 4588.0 4188.0 4417.0 4711.0 4118.0 4356.0 4926.0
top 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
freq 421.0 2871.0 1274.0 2046.0 437.0 2604.0 408.0 3378.0 486.0 1156.0 ... 1358.0 244.0 1635.0 559.0 959.0 729.0 436.0 1029.0 791.0 221.0

4 rows × 29 columns

In [25]:
df['tile-exp'][df['tile-exp'] > 5000] = 5000
In [26]:
df['tile-exp'].shape
Out[26]:
(29, 5146)
In [27]:
df['tile'].loc['Z.Z'].head()
Out[27]:
(C-643, Cell Type: B cells)     9
(C-1264, Cell Type: B cells)    9
(C-1589, Cell Type: B cells)    9
(C-1611, Cell Type: B cells)    8
(C-1873, Cell Type: B cells)    6
Name: Z.Z, dtype: object

Location of Cells

In [28]:
df['tile-loc'] = df['tile'].loc[['X.X', 'Y.Y']].transpose()
df['tile-loc'].shape
Out[28]:
(5146, 2)
In [29]:
# df['tile-loc']['Y.Y'] = 1000 - df['tile-loc']['Y.Y'] 
In [30]:
df['tile-loc'] = df['tile-loc'].astype('int')
In [31]:
def set_expression_opacity(inst_gene):
    ser_opacity = df['tile-exp'].loc[inst_gene]
    list_opacity = [float(x/ser_opacity.max()) for x in list(ser_opacity.get_values())]
    scatter.default_opacities = list_opacity
In [32]:
fig = plt.figure(title='Scatter')
def_tt = bqplot.Tooltip(fields=['name'], formats=[''])

scatter = plt.scatter(df['tile-loc']['X.X'], 
                      df['tile-loc']['Y.Y'], 
                      figsize=(20,10), 
                      ylim=(0,1000), 
                      xlim=(0,1000), stroke='black', 
                      tooltip=def_tt, 
                      names=df['tile-loc'].index.tolist(), 
                      display_names=False)

inst_width = 900
fig.layout.min_height = str(inst_width/1.2) + 'px'
fig.layout.min_width = str(inst_width) + 'px'

set_expression_opacity('NKp46')

scatter.default_size = 100
scatter.colors = ['red']

Using RGB to HEX

https://www.rgbtohex.net/

In [33]:
cat_colors = {}
# A 
cat_colors['NK cells'] = '#FB0006'
# B
cat_colors['granulocytes'] = '#FA1400'
# C 
cat_colors['CD4(-)CD8(-) cDC'] = '#FC4B08'
# D 
cat_colors['B220(+) DN T cells'] = '#FD8007'
# E 
cat_colors['plasma cells'] = '#FDBA0A'
# F 
cat_colors['F4/80(+) mphs'] = '#FFF80B'
# G 
cat_colors['FDCs'] = '#FC9CA0'
# H
cat_colors['CD11c(+) B cells'] = '#99FF06'
# I 
cat_colors['capsule'] = '#68FF0A'
# J 
cat_colors['marginal zone mphs'] = '#0A4600'
# K 
cat_colors['noid'] = '#25FF04'
# L 
cat_colors['B cells'] = '#FFFF09'
# M
cat_colors['erythroblasts'] = '#1FFF3C'
# N
cat_colors['CD106(+)CD16/32(+)CD31(+) stroma'] = '#23FF6D'
# O
cat_colors['CD4(-)CD8(+)cDC'] = '#23FFA3'
# P 
cat_colors['CD106(-)CD16/32(-)Ly6C(+)CD31(+) stroma'] = '#20FFDD'
# Q
cat_colors['megakaryocytes'] = '#1CE5FF'
# R 
cat_colors['CD106(-)CD16/32(+)Ly6C(+)CD31(-)'] = '#15A7FF'
# S
cat_colors['CD4(+) T cells'] = '#0D6FFF'
# T
cat_colors['CD4(+)MHCII(+)'] = '#0137FF'
# Ud
cat_colors['CD31(hi) vascular'] = '#0000FF'
# V
cat_colors['CD3(+) other markers (-)'] = '#0700FF'
# W
cat_colors['CD106(+)CD16/32(+)CD31(-)Ly6C(-) stroma'] = '#2D00FF'
# X
cat_colors['CD8(+) T cells'] = '#5900FF'
# Y
cat_colors['ERTR7(+) stroma'] = '#8C00FE'
# Z
cat_colors['CD106(+)CD16/32(-)Ly6C(+)CD31(+)'] = '#C300FF'
# [
cat_colors['CD4(+)CD8(-)cDC'] = '#FB00FA'
# dirt
cat_colors['dirt'] = '#BCBCBC'

Expression Levels

In [34]:
keep_cols = [x for x in df['tile-exp'].columns.tolist() if 'dirt' not in x[1]]
df['tile-exp-clean'] = df['tile-exp'][keep_cols]
df['tile-exp-clean'].shape
Out[34]:
(29, 5018)
In [35]:
net.load_df(df['tile-exp-clean'])
net.set_cat_colors(axis='col', cat_colors=cat_colors, cat_index=1, cat_title='Cell Type')
In [36]:
net.load_df(df['tile-exp-clean'])
net.widget()

Voronoi Plot

In [37]:
from scipy.spatial import Voronoi
vor = Voronoi(df['tile-loc'])
In [38]:
point_list = df['tile-loc'].index.tolist()
point_names = [x[0] for x in point_list]
cat_names   = [x[1].split(': ')[1] for x in point_list]
In [39]:
patch_data = {}
patch_data['x'] = []
patch_data['y'] = []
patch_data['colors'] = []
region_labels = []

region_point_dict = {}
for point_index in range(vor.point_region.shape[0]):
    region_index = vor.point_region[point_index]
    region_point_dict[region_index] = point_index

for region_index in range(len(vor.regions)):
    
    inst_region = vor.regions[region_index]

    if -1 not in inst_region and len(inst_region) > 0:

        point_index = region_point_dict[region_index]
        point_cat = cat_names[point_index]
        region_labels.append(point_cat)
        
        # save cat_colors
        inst_color = cat_colors[point_cat]
        patch_data['colors'].append(inst_color)
        
        x_list = []
        y_list = []
        for inst_vertex in inst_region:
            inst_pos = vor.vertices[inst_vertex]
            x_list.append(inst_pos[0])
            y_list.append(inst_pos[1])
            
        patch_data['x'].append(x_list)
        patch_data['y'].append(y_list)    
In [40]:
import bqplot.pyplot as plt
fig = plt.figure(animation_duration=1000)
patch = plt.plot([], [], 
                 fill='inside',
                 fill_colors=patch_data['colors'],
                 stroke_width=1,
                 close_path=True,
                 labels=region_labels,
                 tooltip=def_tt,
                 axes_options={'x': {'visible': False}, 'y': {'visible': False}},
                )

scatter = plt.scatter(df['tile-loc']['X.X'], 
                      df['tile-loc']['Y.Y'],
#                       tooltip=def_tt, 
                      names=point_names,
                      display_names=False, default_size=2)


inst_width = 950
fig.layout.min_height = str(inst_width/(1.15)) + 'px'
fig.layout.min_width  = str(inst_width) + 'px'

patch.x = patch_data['x']
patch.y = patch_data['y']

plt.xlim(0, 2.0*x_dim)
plt.ylim(0, 2.0*y_dim)

fig
# plt.show()
In [41]:
def mouseover_highlight(self, target):
    # print('cat name', target['data']['name'])
    list_opacities = []
    for inst_label in region_labels:
        inst_opacity = 0.25
        if inst_label == target['data']['name']:
            inst_opacity = 1
        list_opacities.append(inst_opacity)

    self.opacities = list_opacities
In [42]:
def reset_highlight(self, target):
#     print('CLICKING')
    list_opacities = [1 for x in region_labels]
    self.opacities = list_opacities
In [43]:
patch.on_hover(mouseover_highlight)
patch.on_element_click(reset_highlight)
In [ ]: